package jupitermouse.site.keytuple;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.util.Collector;

import java.util.stream.Stream;

/**
 * Q: 在DataSet中sum参数只能为int
 */
public class PojoBatchWCJavaApp {

    public static void main(String[] args) throws Exception {

        String inputPath = "file:///E:\\workroom\\learn\\flink\\demo_flink\\doc\\word.txt";

        //set env
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        //read data
        DataSet<String> dataSource = env.readTextFile(inputPath);
        //transform
        dataSource.flatMap(new FlatMapFunction<String, WC>() {
            @Override
            public void flatMap(String s, Collector<WC> collector) throws Exception {
                String[] tokens = s.toLowerCase().split(" ");
                Stream.of(tokens)
                        .forEach(item -> {
                            collector.collect(new WC(item, 1));
                        });
            }
            }).groupBy("word").sum(1).print();

        env.execute();

    };

}


